#!/usr/bin/env python
# -*- coding: utf-8 -*- 
#methods to write the corpus to text files
#writes both the raw URL text and the processed corpus

import sys
import os
import corpus_writer
PATH = corpus_writer.__file__.split("/util")[0]
sys.path.append(PATH + "\\objects")
import utility as util
import sys
sys.path.append(util.PATH + "\\objects")

def write_urls(url, path = PATH + "\\corpora\\url_corpora"):
    """Takes a CHILDES url,
    and writes each url's processed raw text
    from each transcript to a text file"""
    #get all of the transcripts
    transcripts = util.url_to_phon_transcripts(url)
    #first, check to see if a corpora directory already exists, if so, make it, otherwise proceed
    if not os.path.exists(path): os.makedirs(path)
    #now we make the next path regardless, specific for researcher and child subject
    corpus_path = path + "\\" + transcripts[0].author.replace("/", "_") + "_" + transcripts[0].subject
    os.makedirs(corpus_path)
    for transcript in transcripts:
        url_file = open(corpus_path +  "\\" + transcript.title.replace("/", "_") + ".txt", 'w')
        print>>url_file, transcript.text
        url_file.close()


    
    
    
    
        
        
    
